#######################################################
# This script transforms the co-sonsorship rawdata
# into a dyadic format. You find the input data in 
# "rawdata_short.csv", which is a copy of
# "rawdata_assembly.dta" without the information on 
# political ideology and without the control variables.
# The output file "cooperation_long.dta" is identical 
# to "withtopics.dta"- just without the controls.
# Running this transformation will take some hours.
#######################################################

# load packages after making sure they are installed
  # tidyverse
  if("tidyverse" %in% installed.packages()[,1] == F){
    install.packages("tidyverse")
  } 
  library(tidyverse)
  # haven is required for producing STATA output
  if("haven" %in% installed.packages()[,1] == F){
    install.packages("haven")
  } 
  library(haven)

# Set working Directory.
# !!! make sure the specified working directory 
# !!! contains all raw data


# read in raw data as workspace object "df"
df <- read_csv("rawdata_short.csv")

#######################################################
# generate/extract important background variables
#######################################################

# unique country-codes included in dataset
ccode <- unique(df$ccode)

# data.frame: all possible dyad combinations of country-codes
combs <- t(combn(ccode,2)) %>% as.data.frame() 

# give dyads numbers
combs$dyad <- rownames(combs)

#######################################################
# creating dyad specific datasets (dyad data structure).
#     output is a list object with every element 
#     containing a dataset for each dyad.
#######################################################
dyads <- list() # gen empty list object
# set progress bar
pb <- txtProgressBar(min = 0, max = nrow(combs), style = 3) 
# iterate over all possible dyad combinations
for(i in 1:nrow(combs)){
  # extract members
  ccode1 <- combs[i,1]
  ccode2 <- combs[i,2]
  # extract dyad number
  dyad      <- combs[i,3]
  
  # subset data, store in temporary object
  temp <- df %>% 
    subset(ccode == ccode1 | ccode == ccode2) %>% 
    mutate(dyad = dyad,
           ccode1 = ccode1,
           ccode2 = ccode2)
  # spread data (make cooperation on each draft resolution one observation)
  temp <- temp %>% 
    spread(ccode, author)
  
  # rename specific author column to "author1" and "author2"
  colnames(temp)[9:10] <- paste0("author",1:2)
  
  # name data.frame 
  name <- paste0(ccode1,"-",ccode2)
  dyads[[name]] <- temp
  
  # update progress bar
  setTxtProgressBar(pb, i)
}
# save data, so in case of a crash nothing gets lost
save(dyads, file = "dyads_dyad.RData")

#######################################################
# counting cooperation on topics
#######################################################
load("dyads_dyad.RData")

cooperation <- data.frame() # gen empty data frame object
pb <- txtProgressBar(min = 0, max = nrow(combs), style = 3)
for(i in seq_along(dyads)){
  temp <- rm(temp) # clear temp object
  
  # load dyad specific dataset as temporary object
  temp <- dyads[[i]] 
  # split dyad specific dataset by year (for year specific counts)
  temp <- split(temp, temp$year)
  
  # iterate over each year in split dyad specific datasets
  temp <- lapply(temp ,function(x){ # use lapply for efficiency
    # count how many members of the dyad are listed as authors
    joint <- x %>% 
      select(author1,author2) %>%
      rowSums()
    # dummy: T == if both dyad members were listed as authors
    joint <- ifelse(joint == 2, T, F)
    
    # count cooperations seperated by topic
    coop <- subset(x, joint == T)
    coop <- coop %>% count(topic)
    coop <- merge.data.frame(x = data.frame(topic = 1:5), 
                             y = coop, by = "topic",all = T)
    
    # transpose (spread output) 
    coop <- data.frame(t(coop[,2]))
    colnames(coop) <- paste0("topic",1:5) # name topics
    
    # add variables
      # year
      coop$year <- x %>% pull(year) %>% unique()  
      # dyad member1 and dyad member 2
      coop$ccode1 <- x %>% pull(ccode1) %>% unique() 
      coop$ccode2 <- x %>% pull(ccode2) %>% unique
      # give dyad a specific name
      coop$dyad <- paste(coop$ccode1,"-",coop$ccode2)
      # names of draft resolutions where dyad cooperated
      coop$cooperation <- x %>% subset(joint == T) %>% 
        select(symbol) %>% 
        paste0(collapse = ";")
      coop$cooperation <- ifelse(coop$cooperation == "character(0)",
                                 NA, coop$cooperation)
    return(coop)})
  
  # bind output to single data frame
  temp <- bind_rows(temp)
  
  # update progress bar
  setTxtProgressBar(pb, i)
  
  # bind dyad specific datasets to complete dataset
  cooperation <-rbind(cooperation,temp)
}
# save file
save(cooperation, file = "cooperation.RData")

#######################################################
# counting cooperation on topics 
# (in Human Rights Committee)
#######################################################
load("dyads_dyad.RData")

cooperation_HRC <- data.frame() # gen empty data frame object
pb <- txtProgressBar(min = 0, max = nrow(combs), style = 3)
for(i in seq_along(dyads)){
  temp <- rm(temp) # clear temp object
  
  # load dyad specific dataset as temporary object
  temp <- dyads[[i]] 
  # split dyad specific dataset by year (for year specific counts)
  temp <- split(temp, temp$year)
  
  # iterate over each year in split dyad specific datasets
  temp <- lapply(temp ,function(x){ # use lapply for efficiency
    # count how many members of the dyad are listed as authors
    joint <- x %>% 
      select(author1,author2) %>%
      rowSums()
    # dummy: T == if both dyad members were listed as authors
    joint <- ifelse(joint == 2, T, F)
    # deselect observations with HRC == 0
    joint <- ifelse(x$HRC == 0, F, joint)
    
    # count cooperations seperated by topic
    coop <- subset(x, joint == T)
    coop <- coop %>% count(topic)
    coop <- merge.data.frame(x = data.frame(topic = 1:5), 
                             y = coop, by = "topic",all = T)
    
    # transpose (spread output) 
    coop <- data.frame(t(coop[,2]))
    colnames(coop) <- paste0("topic",1:5) # name topics
    
    # add variables
      # year
      coop$year <- x %>% pull(year) %>% unique()  
      # dyad member1 and dyad member 2
      coop$ccode1 <- x %>% pull(ccode1) %>% unique() 
      coop$ccode2 <- x %>% pull(ccode2) %>% unique
      # give dyad a specific name
      coop$dyad <- paste(coop$ccode1,"-",coop$ccode2)
      # names of draft resolutions where dyad cooperated
      coop$cooperation <- x %>% subset(joint == T) %>% 
        select(symbol) %>% 
        paste0(collapse = ";")
      coop$cooperation <- ifelse(coop$cooperation == "character(0)",
                                 NA, coop$cooperation)
    return(coop)})
  
  # bind output to single data frame
  temp <- bind_rows(temp)
  
  # update progress bar
  setTxtProgressBar(pb, i)
  
  # bind dyad specific datasets to complete dataset
  cooperation_HRC <-rbind(cooperation_HRC,temp)
}
# save file
save(cooperation_HRC, file = "cooperation_HRC.RData")

#######################################################
# Reshape data to long format
#######################################################

# cooperation Dataset long
cooperation_long <- gather(cooperation, key = "topic", value = "n", topic1:topic5)
cooperation_HRC_long <- gather(cooperation_HRC, key = "topic_HRC", value = "n_HRC", topic1:topic5)
  
# bind datasets
cooperation_long <- cbind(cooperation_long, n_HRC =cooperation_HRC_long$n_HRC)

#######################################################
# Make final adjustments to the variables
#######################################################

# drop cooperation variable for .dta 
#   Error: Value [of column] was longer than the available 
#   storage size of the specified column.
cooperation <- cooperation %>% select(-cooperation)
cooperation_HRC <- cooperation_HRC %>% select(-cooperation)
cooperation_long <- cooperation_long %>% select(-cooperation)

# sort columns of data frames
cooperation <- cooperation[,c("year","ccode1","ccode2","dyad",paste0("topic",1:5))]
cooperation_HRC <- cooperation_HRC[,c("year","ccode1","ccode2","dyad",paste0("topic",1:5))]
cooperation_long <- cooperation_long[,c("year","ccode1","ccode2","dyad","topic","n","n_HRC")]

# rename variables
names(cooperation) <- c("year","ccode1","ccode2","dyad",paste0("S",1:5))
names(cooperation_HRC) <- c("year","ccode1","ccode2","dyad",paste0("Shrc",1:5))
names(cooperation_long) <- c("year","ccode1","ccode2","dyad","topic","S","Shrc")

# save data finally
save(cooperation, file = "cooperation.RData")
save(cooperation_HRC, file = "cooperation_HRC.RData")
save(cooperation_long, file = "cooperation_long.RData")

#######################################################
# Write Data to .dta format
#######################################################
load("cooperation.RData")
load("cooperation_HRC.RData")
load("cooperation_long.RData")

# Export to stata
write_dta(cooperation, "cooperation.dta")
write_dta(cooperation_HRC, "cooperation_HRC.dta")
write_dta(cooperation_long, "cooperation_long.dta")





